# -*- coding: utf-8 -*-
# @Time        :2024/11/3 下午3:55
# @Author      :文刀水寿
# @File        : 05_数据输出_输出为python对象.py
"""
 @Description :
"""
from pyspark import SparkConf, SparkContext
import os
import json

os.environ['PYSPARK_PYTHON'] = "D:/Python/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 准备RDD
rdd = sc.parallelize([1, 2, 3, 4, 5])

# collect算子，输出RDD为list对象
rdd_list = rdd.collect()
print(rdd_list)
print(type(rdd_list))

# reduce算子，对RDD进行俩俩聚合
num = rdd.reduce(lambda a, b: a + b)
print(num)

# take算子，取出RDDD前N个元素， 组成list返回
take_list = rdd.take(3)
print(take_list)

# count， 统计rdd内有多少条数据，返回值为数字
count = rdd.count()
print(f"rdd内有{count}条数据")
sc.stop()
